First, import and tidy data:
gender neighborhood VS hiv
neb_plot = hiv_data %>%
group_by(neighborhood, gender) %>%
filter(year != "ALL", borough != "All", neighborhood != "All", gender != "All") %>%
filter(age != "All") %>%
summarise(sum_hiv = sum(hiv_diagnoses)) %>%
ggplot(aes(x = reorder(neighborhood, sum_hiv), y = sum_hiv, color = gender)) +
coord_flip() +
geom_point() +
labs(
title = "Gender and Neighborhood Influence on HIV Incidence",
x = "Neighborhood",
y = "HIV diagnoses",
caption = "Data from the ..."
)
ggplotly(neb_plot)
<<<<<<< HEAD
=======
>>>>>>> dbf867823c22e28c659c64c9e3f898184f3e4c14
The number of HIV diagnoses is higher among male than female in all neighborhoods. Beford Stuyvesant - Crown Heights have the most HIV diagnoses for both men and women.
age_plot = hiv_data %>%
filter(race == "All" & borough == "All" & age != "All") %>%
group_by(gender, age) %>%
summarise(sum_hiv = sum(hiv_diagnoses)) %>%
ggplot(aes(y = sum_hiv, x = age, fill = gender)) +
geom_bar(stat = "identity", alpha = 0.8, position = position_dodge()) +
scale_fill_brewer(palette = "Dark2") +
labs(
title = "Gender and Age Influence on HIV Incidence",
x = "Age range",
y = "HIV diagnoses",
caption = "Data from the ..."
)
ggplotly(age_plot)
<<<<<<< HEAD
=======
>>>>>>> dbf867823c22e28c659c64c9e3f898184f3e4c14
race_plot = hiv_data %>%
filter(age == "All" & borough == "All" & race != "All") %>%
group_by(gender, race) %>%
summarise(sum_hiv = sum(hiv_diagnoses)) %>%
ggplot(aes(y = sum_hiv, x = reorder(race, sum_hiv), fill = gender)) +
geom_bar(stat = "identity", alpha = 0.8, position = position_dodge()) +
scale_fill_manual(values = c("#E69F00", "#56B4E9")) +
labs(
title = "Race and Gender Influence on HIV Incidence",
x = "Race",
y = "HIV diagnoses",
caption = "Data from the ..."
)
ggplotly(race_plot)
<<<<<<< HEAD
=======
>>>>>>> dbf867823c22e28c659c64c9e3f898184f3e4c14
hiv diagnoses in borough with most hiv over years
hiv_data %>%
filter(borough != "All", neighborhood == "All", gender == "All", age == "All", race == "All") %>%
group_by(borough) %>%
summarize(sum_hiv = sum(hiv_diagnoses)) %>%
arrange(desc(sum_hiv))
## # A tibble: 5 x 2
## borough sum_hiv
## <chr> <int>
## 1 Brooklyn 3815
## 2 Manhattan 3536
## 3 Bronx 2736
## 4 Queens 2327
## 5 Staten Island 217
year_plot = hiv_data %>%
mutate(year = as.integer(year)) %>%
filter(borough == "Brooklyn" & gender == "Male" & age == "20 - 29") %>%
group_by(year, neighborhood) %>%
summarize(sum_hiv = sum(hiv_diagnoses)) %>%
ggplot(aes(x = year, y = sum_hiv, color = neighborhood)) +
geom_line()
ggplotly(year_plot)
<<<<<<< HEAD
=======
hiv_income = read_csv("./data/combine_hiv_income.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_integer(),
## uhf = col_character(),
## borough = col_character(),
## gender = col_character(),
## age = col_character(),
## race = col_character(),
## hiv_diagnosis_rate = col_double(),
## aids_diagnosis_rate = col_double(),
## plwdhi_prevalence = col_double(),
## death_rate = col_double(),
## hiv_related_death_rate = col_double(),
## non_hiv_related_death_rate = col_double()
## )
## See spec(...) for full column specifications.
income_plot = hiv_income %>%
filter(year != 2011, gender == "All", age == "All", race == "All") %>%
group_by(uhf) %>%
summarise(sum_hiv = sum(hiv_diagnoses), mean_income = mean(mid_income)) %>%
ggplot(aes(x = mean_income, y = sum_hiv)) +
geom_point() +
labs(
title = "Income Influence on HIV Incidence",
x = "Average income of each neighborhood",
y = "HIV diagnoses",
caption = "Data from the ..."
)
ggplotly(income_plot)
>>>>>>> dbf867823c22e28c659c64c9e3f898184f3e4c14
Limitations:
We can not visualize the effect of age and race at the same time.